# Table 4: Language Use of Stated Objectives of 13D Filings

###################
# financial vs nonfinancial
##################


# specificity
df_2types = pd.read_csv('data/accno_cik_type_ner.txt', sep='|')

import statsmodels.formula.api as smf

reg1 = smf.ols(formula=f"pct7 ~ C(type) ", data=df_2types).fit()

print('measure | nonfinancial | financial | p-value of diff') 
print('-'*60+'\nspecificity | ', end='')
print(f'{(reg1.params.iloc[0])*100: 0.2f} | {(reg1.params.iloc[0] + reg1.params.iloc[1])*100: 0.2f} | {reg1.pvalues.iloc[1]*100 :0.3f}')

# other measures

df_2types = pd.read_csv('data/accno_cik_type_tone.txt', sep='|', names=['accno','cik','type','weak','strong','pos','neg','unc','nwords'])

df_2types['weak'] = df_2types.weak/(df_2types.nwords/1000)
df_2types['strong'] = df_2types.strong/(df_2types.nwords/1000)
df_2types['pos'] = df_2types.pos/(df_2types.nwords/1000)
df_2types['neg'] = df_2types.neg/(df_2types.nwords/1000)
df_2types['unc'] = df_2types.unc/(df_2types.nwords/1000)

df_2types['weak_minus_strong'] = df_2types.weak - df_2types.strong
df_2types['neg_minus_pos'] = df_2types.neg - df_2types.pos


import statsmodels.formula.api as smf

def tab(words):
    reg1 = smf.ols(formula=f"{words} ~ C(type) ", data=df_2types).fit()
    print(f'{words} | {(reg1.params.iloc[0]): 0.2f} | {reg1.params.iloc[0] + reg1.params.iloc[1]: 0.2f} | {reg1.pvalues.iloc[1]*100 :0.3f}')

print('measure | nonfinancial | financial | p-value of diff') 
print('-'*60)

tab('weak')
tab('strong')
tab('weak_minus_strong')  
tab('unc')
tab('neg')
tab('pos')
tab('neg_minus_pos')

###################
# 4 types
##################

# specificity
 
df_4types = pd.read_csv('data/accno_cik_4type_ner.txt', sep='|')

import statsmodels.formula.api as smf

reg1 = smf.ols(formula=f"pct7 ~ C(type)", data=df_4types).fit()

print('measure | individual | hedge funds | other private | institutional') 
print('-'*60+'\nspecificity | ', end='')
print(f'{(reg1.params.iloc[0] + reg1.params.iloc[2])*100 : 0.2f} | {(reg1.params.iloc[0] + reg1.params.iloc[1])*100 : 0.2f} | {(reg1.params.iloc[0] + reg1.params.iloc[3])*100 : 0.2f} | {reg1.params.iloc[0]*100: 0.2f} ')

# other measures

df_4types = pd.read_csv('data/accno_cik_4types_tone.txt', sep='|', names=['accno','cik','type','weak','strong','pos','neg','unc','nwords'])

df_4types['weak'] = df_4types.weak/(df_4types.nwords/1000)
df_4types['strong'] = df_4types.strong/(df_4types.nwords/1000)
df_4types['pos'] = df_4types.pos/(df_4types.nwords/1000)
df_4types['neg'] = df_4types.neg/(df_4types.nwords/1000)
df_4types['unc'] = df_4types.unc/(df_4types.nwords/1000)

df_4types['weak_minus_strong'] = df_4types.weak - df_4types.strong
df_4types['neg_minus_pos'] = df_4types.neg - df_4types.pos

import statsmodels.formula.api as smf

def tab(words):
    reg1 = smf.ols(formula=f"{words} ~ C(type) ", data=df_4).fit()
    print(f'{words} | {(reg1.params.iloc[0] + reg1.params.iloc[2]) : 0.2f} | {(reg1.params.iloc[0] + reg1.params.iloc[1]) : 0.2f} | {(reg1.params.iloc[0] + reg1.params.iloc[3]) : 0.2f} | {reg1.params.iloc[0]: 0.2f} ')

print('measure | individual | hedge funds | other private | institutional') 
print('-'*60)
tab('weak')
tab('strong')
tab('weak_minus_strong')  
tab('unc')
tab('neg')
tab('pos')
tab('neg_minus_pos')


###################
# short-term vs long-term
##################



# specificity
df_stlt = pd.read_csv('data/accno_cik_stlt_ner.txt', sep='|')

import statsmodels.formula.api as smf

reg1 = smf.ols(formula=f"pct7 ~ C(type) ", data=df_stlt).fit()

print('measure | short-term | long-term | p-value of diff') 
print('-'*60+'\nspecificity | ', end='')
print(f'{(reg1.params.iloc[0] + reg1.params.iloc[1])*100: 0.2f} | {(reg1.params.iloc[0])*100: 0.2f} | {reg1.pvalues.iloc[1]*100 :0.3f}')

# other measures

df_stlt = pd.read_csv('data/accno_cik_st_lt_tone.txt', sep='|', names=['accno','cik','type','weak','strong','pos','neg','unc','nwords'])

df_stlt['weak'] = df_stlt.weak/(df_stlt.nwords/1000)
df_stlt['strong'] = df_stlt.strong/(df_stlt.nwords/1000)
df_stlt['pos'] = df_stlt.pos/(df_stlt.nwords/1000)
df_stlt['neg'] = df_stlt.neg/(df_stlt.nwords/1000)
df_stlt['unc'] = df_stlt.unc/(df_stlt.nwords/1000)

df_stlt['weak_minus_strong'] = df_stlt.weak - df_stlt.strong
df_stlt['neg_minus_pos'] = df_stlt.neg - df_stlt.pos


import statsmodels.formula.api as smf

def tab(words):
    reg1 = smf.ols(formula=f"{words} ~ C(type) ", data=df_stlt).fit()
    print(f'{words} | {(reg1.params.iloc[0] + reg1.params.iloc[1]): 0.2f} | {reg1.params.iloc[0]: 0.2f} | {reg1.pvalues.iloc[1]*100 :0.3f}')

print('topic | short-term | long-term | p-value of diff') 
print('-'*60)

tab('weak')
tab('strong')
tab('weak_minus_strong')  
tab('unc')
tab('neg')
tab('pos')
tab('neg_minus_pos')